#################################3
###   Edcuation protject       ###
###   Jonne Kamphorst          ###
###   2022                     ###
###   Liss analysis            ###
###   Version: August 15       ###   
#################################3



###Libraries and data###
library(dplyr)
library(plm)
library(lmtest)
library(estimatr)
library(ggeffects)
library(extrafont)
library(stargazer)
library(ggplot2)
library(tidyverse)
library(texreg)
library(modelsummary)
library(plm)
library(ggpubr)
library(viridis)
library(did2s)
library(PanelMatch)
library(lme4)
library(gridExtra)
library(fect)
library(panelView)
library(haven)

#font_import()
loadfonts(quiet = T, device = "pdf")
windowsFonts(Georgia = windowsFont("Georgia")) #load fonts for windows machines


## Set ggplot theme with the Georgia font
theme_set(theme_light(base_family = "Georgia", base_size = 12))
# Also add:  axis.text=element_text(size=11), legend.text=element_text(size=12)



load("LISS/full_panel_withvars.Rda")
acq <- read_dta("LISS/wj18a_EN_1.0p.dta")






######### Add acquaintances data to the other LISS data ##################



# Keep closest years only for full panel
full_panel <- full_panel %>% filter(year %in% c(11,10,12,9,13))

# Add new variable that sorts by the best year to match on
full_panel$matchyear <- NA
full_panel$matchyear[full_panel$year==11] <- 1
full_panel$matchyear[full_panel$year==10] <- 2
full_panel$matchyear[full_panel$year==12] <- 3
full_panel$matchyear[full_panel$year==9] <- 4
full_panel$matchyear[full_panel$year==13] <- 5

# Sort dataset on this variable
full_panel <- full_panel[order(full_panel$matchyear), ]

# Join the two dataframes together
full_panel <- acq %>% left_join(full_panel, by = c("nomem_encr"))

# For the control variables where we have missigness, pull the missing values from the other waves
full_panel <- full_panel %>% tidyr::fill(total_ties, nettoink_stand, age10, 
                                               urban_dummy, migration_background, 
                                             employed, education, pol_int, parents_HU_anyw, number_high_ties,
                                             .direction=c("downup"))


# Only keep the best matching year (the closest year)
full_panel <- full_panel %>% distinct(nomem_encr, .keep_all=T)

table(full_panel$matchyear) # Most matches from the same wave as the aqc survey has been done in, some from other waves

rm(acq)






######### Add variables to the acq data ##################
## Independent variable: tie completed education levels (ties are 18+)

#wj18a191 - wj18a215 (1 = primary educ; 4 = higher prof; 5 = uni)

full_panel$wj18a191 <- as.numeric(full_panel$wj18a191)
full_panel$wj18a192 <- as.numeric(full_panel$wj18a192)
full_panel$wj18a193 <- as.numeric(full_panel$wj18a193)
full_panel$wj18a194 <- as.numeric(full_panel$wj18a194)
full_panel$wj18a195 <- as.numeric(full_panel$wj18a195)
full_panel$wj18a196 <- as.numeric(full_panel$wj18a196)
full_panel$wj18a197 <- as.numeric(full_panel$wj18a197)
full_panel$wj18a198 <- as.numeric(full_panel$wj18a198)
full_panel$wj18a199 <- as.numeric(full_panel$wj18a199)
full_panel$wj18a200 <- as.numeric(full_panel$wj18a200)
full_panel$wj18a201 <- as.numeric(full_panel$wj18a201)
full_panel$wj18a202 <- as.numeric(full_panel$wj18a202)
full_panel$wj18a203 <- as.numeric(full_panel$wj18a203)
full_panel$wj18a204 <- as.numeric(full_panel$wj18a204)
full_panel$wj18a205 <- as.numeric(full_panel$wj18a205)
full_panel$wj18a206 <- as.numeric(full_panel$wj18a206)
full_panel$wj18a207 <- as.numeric(full_panel$wj18a207)
full_panel$wj18a208 <- as.numeric(full_panel$wj18a208)
full_panel$wj18a209 <- as.numeric(full_panel$wj18a209)
full_panel$wj18a210 <- as.numeric(full_panel$wj18a210)
full_panel$wj18a211 <- as.numeric(full_panel$wj18a211)
full_panel$wj18a212 <- as.numeric(full_panel$wj18a212)
full_panel$wj18a213 <- as.numeric(full_panel$wj18a213)
full_panel$wj18a214 <- as.numeric(full_panel$wj18a214)
full_panel$wj18a215 <- as.numeric(full_panel$wj18a215)

# Mean educ for each respondent's acq network

full_panel$mean_educ <- rowMeans(full_panel[, c("wj18a191", "wj18a192", "wj18a193", "wj18a194",
                                    "wj18a195", "wj18a196", "wj18a197", "wj18a198",
                                    "wj18a199", "wj18a200", "wj18a201", "wj18a202",
                                    "wj18a203", "wj18a204", "wj18a205", "wj18a206",
                                    "wj18a207", "wj18a208", "wj18a209", "wj18a210",
                                    "wj18a211", "wj18a212", "wj18a213", "wj18a214",
                                    "wj18a215")], na.rm = TRUE)

# mean educ for close network

full_panel$mean_educ_close <- rowMeans(full_panel[, c("pers1_edu", "pers2_edu", "pers3_edu", "pers4_edu",
                                          "pers5_edu")], na.rm = TRUE)



# Add the thermostat outcomes (we need these instead of which party vote for because starting in wave 17 half of the respondents see a different question that asks about percentage chance to vote for a given party)
full_panel$term_prog <- rowMeans(full_panel[, c("term_gl", "term_d66")], na.rm = TRUE)
full_panel$term_rrp <- rowMeans(full_panel[, c("term_pvv", "term_fvd")], na.rm = TRUE)


## Dummy-up the treatment variable
# mean_educ
full_panel$mean_educ_4bins <- ntile(full_panel$mean_educ, 4)
full_panel$mean_educ_4bins <- as.factor(full_panel$mean_educ_4bins)



######### Run the analysis ##################
### With a dummied-up variable (as requested by reviewer, now main model in the paper)
full_panel <- full_panel %>% filter(ineducatio==0)
x <- lm(eu_progressive_high ~ mean_educ_4bins + total_ties + nettoink_stand + age10 + 
          urban_dummy + migration_background + employed + education + pol_int + parents_HU_anyw, data = full_panel)
x1 <- lm(immigration_icw_outcome ~ mean_educ_4bins + total_ties + nettoink_stand + age10 + 
           urban_dummy + migration_background + employed + education + pol_int + parents_HU_anyw, data =  full_panel)
x2 <- lm(term_rrp ~ mean_educ_4bins + total_ties + nettoink_stand + age10 + 
           urban_dummy + migration_background + employed + education + pol_int + parents_HU_anyw, data =  full_panel) 
x3 <- lm(term_prog ~ mean_educ_4bins + total_ties + nettoink_stand + age10 + 
           urban_dummy + migration_background + employed + education + pol_int + parents_HU_anyw, data =  full_panel) 
x4 <- lm(redis ~ mean_educ_4bins + total_ties + nettoink_stand + age10 + 
           urban_dummy + migration_background + employed + education + pol_int + parents_HU_anyw, data =  full_panel) 



## And a regression table for the bins
model <- texreg(l=list(x, x1, x2, x3, x4), use.packages=F,
                stars=c(0.05, 0.01, 0.001), 
                digits=3, include.ci = FALSE, booktabs=T,
                custom.model.names = c("DV: EU", "Immigration", "Voting RRP", "Voting Progressive", "Redistribution"),
                custom.coef.names=c("Intercept", "Mean edu of network, bin 2", "Mean edu of network, bin 3", "Mean edu of network, bin 4", "Total ties (in other models)", "Income (standardized)", "Age (by 10 years)", "Urban", "Migration background", "Employed", "Education (continuous)", "Political interest", "Parents(s) higher educated"),
                reorder.coef=c(2,3,4,5,6,7,8,9,10,11,12,13,1),
                caption.above=T,
                label="main_liss_acq",
                caption="LISS - Effect of network education among acquaintances on attitudes and voting",
                threeparttable = TRUE,
                float.pos="htpb!",
                custom.note="\\item %stars. The progressive thermostat is the mean score among GL and D66. The RRP thermostat is the mean score among the PVV and FvD. The EU and redistribution variables are measured on a 5-step Likert scales and the variable is an ICW index (from 0 to 1). The bins are of equal size. The baseline is the first bin.")

print(model, file = "LISS/tables/taba2_main_liss_acq_bins.tex")
# Table a2






# Visualize the models without controlling for close ties
eu_means <- ggeffects::ggpredict(x, terms = c("mean_educ_4bins [1,2,3,4]"))
eu_means <- eu_means %>% rename(mean_educ_4bins =x )
eu_means <- ggplot() +
  geom_pointrange(data=eu_means, aes(x=mean_educ_4bins, y=predicted, ymin = conf.low, ymax = conf.high), size=1, linewidth=1) +
  labs(x="Mean education level of network (4 bins)", y="Predicted outcome", subtitle = "DV: EU attitudes (0-4)")

immi_means <- ggeffects::ggpredict(x1, terms = c("mean_educ_4bins [1,2,3,4]"))
immi_means <- immi_means %>% rename(mean_educ_4bins =x )
immi_means <- ggplot() +
  geom_pointrange(data=immi_means, aes(x=mean_educ_4bins, y=predicted, ymin = conf.low, ymax = conf.high), size=1, linewidth=1) +
  labs(x="Mean education level of network (4 bins)", y="Predicted outcome", subtitle = "DV: Immigration attitudes (ICW index)")

rrp_means <- ggeffects::ggpredict(x2, terms = c("mean_educ_4bins [1,2,3,4]"))
rrp_means <- rrp_means %>% rename(mean_educ_4bins =x )
rrp_means <- ggplot() +
  geom_pointrange(data=rrp_means, aes(x=mean_educ_4bins, y=predicted, ymin = conf.low, ymax = conf.high), size=1, linewidth=1) +
  labs(x="Mean education level of network (4 bins)", y="Predicted outcome", subtitle = "DV: Thermostat for RRP parties (0-10)")

prog_means <- ggeffects::ggpredict(x3, terms = c("mean_educ_4bins [1,2,3,4]"))
prog_means <- prog_means %>% rename(mean_educ_4bins =x )
prog_means <- ggplot() +
  geom_pointrange(data=prog_means, aes(x=mean_educ_4bins, y=predicted, ymin = conf.low, ymax = conf.high), size=1, linewidth=1) +
  labs(x="Mean education level of network (4 bins)", y="Predicted outcome", subtitle = "DV: Thermostat for progressive parties (0-10)")

redis_means <- ggeffects::ggpredict(x4, terms = c("mean_educ_4bins [1,2,3,4]"))
redis_means <- redis_means %>% rename(mean_educ_4bins =x )
redis_means <- ggplot() +
  geom_pointrange(data=redis_means, aes(x=mean_educ_4bins, y=predicted, ymin = conf.low, ymax = conf.high), size=1, linewidth=1) +
  labs(x="Mean education level of network (4 bins)", y="Predicted outcome", subtitle = "DV: Redistribution (0-4)")




plots <- grid.arrange(eu_means, prog_means, immi_means, rrp_means, nrow = 2, ncol=2)
plots
ggsave(plot=plots, "LISS/plots/fig2_acq_plots_dums.png", dpi = 600, width=9, height=8)
# Figure 2


